package com.liam.es.util;

import com.huaban.analysis.jieba.JiebaSegmenter;
import org.springframework.core.io.ClassPathResource;
import org.springframework.core.io.Resource;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class BM25Test {

    public static void main(String[] args) {
        BM25Test bm25Test = new BM25Test();
        try {
            bm25Test.test();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    public void test() throws IOException {
        BM25 bm25 = new BM25(1.2, 0.75);

        String keywords = "公租房消息赶紧看";
        keywords = "租房";
        keywords = "quick brown jumps over";

        List<String> docs = new ArrayList<String>();
//        docs.add("深圳保障房计划给出最新公租房、安居房消息！没房的赶紧来看！");
//        docs.add("在深圳，有多少人每个月最大的一笔支出就是房租。所以大家都挺关心公租房消息的，毕竟公租房能让房租这笔支出少一些又或者是期待安居房能让自己有点买房的机会。");
//        docs.add("深圳的保障房工作近几年进展就很不错。从最初的廉租房、公租房、经济适用住房，发展到今天的公租房、安居房和人才住房。");
//        docs.add("保障群体从最初的户籍低收入家庭，扩展到现在的户籍中低收入家庭、人才家庭，以及为城市提供基本公共服务的公交司机、环卫工人和先进制造业职工等群体");
//        docs.add("好消息，新版租房合同来袭，在深圳租房的你有福了！");
        docs.add("The quick brown fox jumps over the quick dog");
//        docs.add("The quick brown fox");
//        docs.add("The quick brown fox jumps over the lazy dog");


        JiebaSegmenter segmenter = new JiebaSegmenter();

        Map<String, Double> idfMap = new HashMap<String, Double>();
//        loadIDFMap(idfMap, this.getClass().getResourceAsStream("idf_dict.txt"));

        loadIDFMap(idfMap,new ClassPathResource("dict.txt").getInputStream());

//        "title" : "The quick brown fox jumps over the quick dog"
//        "_score" : 0.4425555,
//
//                "title" : "The quick brown fox"
//        "_score" : 0.423274,
//
//                "title" : "The quick brown fox jumps over the lazy dog"
//        "_score" : 0.30818442,

        for (String doc : docs) {

//            List<String> list = segmenter.sentenceProcess(keywords);
//            double sum = 0;
//            int count = 0;
//            for (String s : list){
//                sum += bm25.cal(s, doc, docs, idfMap);
//                count ++ ;
//            }
            System.out.println(keywords + "-bm25计算：" + bm25.cal(keywords, doc, docs, idfMap));
        }
    }

    private static void loadIDFMap(Map<String, Double> map, InputStream in) {
        BufferedReader bufr;
        try {
            bufr = new BufferedReader(new InputStreamReader(in));
            String line = null;
            while ((line = bufr.readLine()) != null) {
                String[] kv = line.trim().split(" ");
                map.put(kv[0], Double.parseDouble(kv[1]));
            }
            try {
                bufr.close();
            } catch (IOException e) {
                e.printStackTrace();
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}
